library(tidyr)
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(corrplot)
## corrplot 0.92 loaded
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout

Abrindo o arquivo

df_cafe <- read.csv("df_arabica_clean.csv")

Precisamos fazer uma limpeza na planilha de dados

Separando as variaveis numericas

Adicionando um corrplot (meio malfeito ainda)

Observando media e desvio padrao de cada variavel

var_num_cafe <- df_cafe[,c(21:27,30,32,33)]

corrplot(cor(var_num_cafe))

apply(var_num_cafe, 2, mean)
##               Aroma              Flavor          Aftertaste             Acidity 
##            7.721063            7.744734            7.599758            7.690290 
##                Body             Balance          Uniformity             Overall 
##            7.640918            7.644058            9.990338            7.676812 
##    Total.Cup.Points Moisture.Percentage 
##           83.706570           10.735266
apply(var_num_cafe, 2, sd)
##               Aroma              Flavor          Aftertaste             Acidity 
##           0.2876264           0.2796128           0.2759106           0.2595102 
##                Body             Balance          Uniformity             Overall 
##           0.2334994           0.2562992           0.1033064           0.3063589 
##    Total.Cup.Points Moisture.Percentage 
##           1.7304170           1.2474684
df_medias <- df_cafe %>%
  group_by(Country.of.Origin) %>% 
  summarise(
    media_Aroma = mean(Aroma),
    media_Flavor = mean(Flavor),
    media_Aftertaste = mean(Aftertaste),
    media_Acidity = mean(Acidity),
    media_Body = mean(Body),
    media_Balance = mean(Balance),
    media_Overall = mean(Overall),
    media_Total.Cup.Points = mean(Total.Cup.Points),
    media_Moisture.Percentage = mean(Moisture.Percentage)
  )

Corrigindo nomes

df_medias$Country.of.Origin[18] <- "Tanzania"
df_medias$Country.of.Origin[21] <- "USA"

names(df_medias)[1] <- "region"
ggplotly(df_medias %>% 
  ggplot() +
  geom_col(aes(x = reorder(region, -media_Aroma), y = media_Aroma), fill = "#753D06") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) +
  xlab("Paises") +
  ylab("Media do Aroma"),
  tooltip = "y") 

Fazendo mapa

world_map <- map_data("world")

world_map <- left_join(world_map, df_medias, by = "region")

world_map_1 <- world_map %>% filter(!is.na(world_map$media_Aroma))

ggplot(world_map , aes( x = long, y = lat, group=group)) +
  geom_polygon(aes(fill = media_Aroma), color = "black") +
  scale_fill_gradient(name = "Media Aroma", low = "#DE9B58", high =  "#472401", na.value = "grey50")

ggplotly()